library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(ggplot2)
chlamydia = read.csv("https://query.data.world/s/cu0r246l5ljt4q3411q66hoq",header=T)
aids = read.csv("https://query.data.world/s/b75zwogm4rndt5lwhkmrfnhwc",header=T)
#chlamydia.backup = chlamydia
#aids.backup = aids
chlamydia$Cases = as.numeric(chlamydia$Cases)
chlamydia$Population = as.numeric(chlamydia$Population)
aids$Cases = as.numeric(aids$Cases)
aids$Population = as.numeric(aids$Population)
summary(chlamydia)
## Indicator Year Geography FIPS
## Chlamydia:896 Min. :2000 Alabama : 16 Min. : 1.00
## 1st Qu.:2004 Alaska : 16 1st Qu.:17.75
## Median :2008 American Samoa: 16 Median :31.50
## Mean :2008 Arizona : 16 Mean :32.54
## 3rd Qu.:2011 Arkansas : 16 3rd Qu.:46.25
## Max. :2015 California : 16 Max. :78.00
## (Other) :800
## Race Sex Age.group
## All races/ethnicities:896 Both sexes:896 All age groups:896
##
##
##
##
##
##
## Misc Rate
## All transmission categories:896 Data not available: 32
## 306.5 : 3
## 487.5 : 3
## 151.9 : 2
## 152.8 : 2
## 154.7 : 2
## (Other) :852
## Cases Population
## Min. : 1.0 Min. : 1.0
## 1st Qu.:222.8 1st Qu.:210.8
## Median :440.5 Median :419.5
## Mean :440.7 Mean :420.4
## 3rd Qu.:660.2 3rd Qu.:632.2
## Max. :850.0 Max. :811.0
##
summary(aids)
## Indicator Year Geography FIPS
## AIDS deaths:840 Min. :2000 Alabama : 15 Min. : 1.00
## 1st Qu.:2003 Alaska : 15 1st Qu.:17.75
## Median :2007 American Samoa: 15 Median :31.50
## Mean :2007 Arizona : 15 Mean :32.54
## 3rd Qu.:2011 Arkansas : 15 3rd Qu.:46.25
## Max. :2014 California : 15 Max. :78.00
## (Other) :750
## Race Sex
## All races/ethnicities:840 Both sexes:840
##
##
##
##
##
##
## Age.group Misc
## Ages 13 years and older:840 All transmission categories:840
##
##
##
##
##
##
## Rate Cases Population
## Min. : 0.000 Min. : 1.0 Min. : 1.0
## 1st Qu.: 1.800 1st Qu.: 85.0 1st Qu.:210.8
## Median : 3.600 Median :208.0 Median :420.5
## Mean : 5.801 Mean :202.3 Mean :420.5
## 3rd Qu.: 6.700 3rd Qu.:307.0 3rd Qu.:630.2
## Max. :76.900 Max. :416.0 Max. :840.0
##
chlamydia$Disease = 'Chlamydia'
aids$Disease = 'AIDS'
df.all = rbind(chlamydia,aids)
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(7.2, 2.6, 0, 4.9, 3.7, 7, :
## invalid factor level, NA generated
df.all$Disease = as.factor(df.all$Disease)
df.all$Year = as.factor(df.all$Year)
df = df.all %>%
group_by(Disease,Year) %>%
summarise(Population=sum(Population),Cases=sum(Cases),Rate=(100*sum(Cases))/sum(Population))
p = ggplot(df, aes(x=Year,y=Population,col=Disease))
p + geom_line(aes(group=Disease))

#Discrepancy between Populations in data sets... AIDS is almost exactly linear...
df = df.all %>%
filter(as.numeric(Year) <= 2014) %>%
group_by(Disease,Geography) %>%
summarise(Population = sum(Population))
p = ggplot(df,aes(x=Disease,y=Population))
p + geom_bar(stat='identity') + facet_wrap(~Geography)

library(trelliscopejs)
## Warning: replacing previous import by 'ggplot2::%+%' when loading
## 'trelliscopejs'
## Warning: replacing previous import by 'ggplot2::facet_wrap' when loading
## 'trelliscopejs'
qplot(Year, Rate, data = df.all) +
theme_bw() +
facet_trelliscope(~ Geography + Disease, nrow = 4, ncol = 6,self_contained=TRUE)